(Attempt to control for hospitalization days attributable to other conditions than flu)
dat.003 = dat.003[dat.003$anydx != 1, ]
##
## Spearman's rank correlation rho
##
## data: valid$age and valid$tothospdays
## S = 984130, p-value = 1.948e-06
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.3245169
There is a positive correlation between age and duration of hospitalization.
##
## Spearman's rank correlation rho
##
## data: valid$age and valid$tothospdays
## S = 202550, p-value = 0.002142
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2787536
Results show a positive correlation between duration of hosp and age.
H1N1
H3N2
H1N1
No clear effect of season.
H3N2
No clear effect of season –> Try models that don’t include season and country as blocking vars
We know the relationship between age and probability of infection should be non-linear. Use cross-validation to verify, and to choose the number of degrees of freedom to include in a spline
Plot the overall test error rate vs. df
Use linear fit.
We know the relationship between age and probability of infection should be non-linear. Use cross-validation to verify, and to choose the number of degrees of freedom to include in a spline
Plot the overall test error rate vs. df
Conclusion: Use 2 df
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
##
## Argentina Australia Austria Belgium China Denmark Germany
## 8 15 2 3 5 26 8
## Greece Norway Peru Poland Singapore Spain Thailand
## 28 1 7 4 1 14 32
## UK USA
## 37 7
## [1] 198
Treat vaccination, antivial use, underlying symptoms, country and season as blocking variables with fixed effects
library(splines)
## H1N1 fit to medical history only (no effect of country, season, age or imprinting)
fit00 = glm(tothospdays ~ anyvac + anyav , data = train.H1N1)
summary(fit00)
##
## Call:
## glm(formula = tothospdays ~ anyvac + anyav, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.592 -5.592 -4.336 -0.592 73.408
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.536 1.921 3.402 0.000829 ***
## anyvac1 -2.457 3.079 -0.798 0.426007
## anyav1 2.056 2.149 0.957 0.340003
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 138.0064)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 24151 on 175 degrees of freedom
## AIC: 1387.2
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to country, season and medical history only (no effect of age or imprinting)
fit0 = glm(tothospdays ~ anyvac + anyav + country + season, data = train.H1N1)
summary(fit0)
##
## Call:
## glm(formula = tothospdays ~ anyvac + anyav + country + season,
## data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -30.268 -5.195 -1.317 1.871 52.581
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2323 6.3885 0.193 0.847313
## anyvac1 -2.1808 3.1186 -0.699 0.485473
## anyav1 4.1029 2.2118 1.855 0.065600 .
## countryAustralia -7.3290 5.2674 -1.391 0.166208
## countryAustria 15.7163 9.7232 1.616 0.108161
## countryBelgium -1.1329 8.6900 -0.130 0.896455
## countryChina 1.9765 8.1844 0.241 0.809512
## countryDenmark -0.9244 6.0594 -0.153 0.878956
## countryGermany 5.3378 6.8884 0.775 0.439642
## countryGreece -0.1969 6.0284 -0.033 0.973993
## countryNorway -0.3352 12.2766 -0.027 0.978253
## countryPeru 11.5772 7.4646 1.551 0.123065
## countryPoland -3.6089 8.0148 -0.450 0.653172
## countrySingapore -8.5219 14.1011 -0.604 0.546549
## countrySpain 4.0535 6.5270 0.621 0.535536
## countryThailand -7.5219 5.1082 -1.473 0.143019
## countryUK 1.7726 5.8337 0.304 0.761674
## countryUSA 2.5464 7.1331 0.357 0.721622
## seasonNH.10.11 4.5989 3.3854 1.358 0.176404
## seasonNH.11.12 -1.7786 12.0092 -0.148 0.882462
## seasonNH.12.13 25.9567 7.0358 3.689 0.000316 ***
## seasonNH.13.14 4.3108 3.3393 1.291 0.198750
## seasonNH.14.15 25.2416 6.1767 4.087 7.17e-05 ***
## seasonNH.15.16 2.3157 2.8054 0.825 0.410456
## seasonNH.16.17 5.1867 8.5055 0.610 0.542935
## seasonSH.10 6.6867 8.5055 0.786 0.433041
## seasonSH.11 6.5842 9.2856 0.709 0.479402
## seasonSH.13 9.9673 6.6713 1.494 0.137306
## seasonSH.14 19.8546 7.3322 2.708 0.007575 **
## seasonSH.16 5.3112 4.3697 1.215 0.226142
## seasonSH.17 4.1867 11.3985 0.367 0.713924
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 115.1654)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 16929 on 147 degrees of freedom
## AIC: 1379.9
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age only, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit1 = glm(tothospdays ~ ns(age, knots = 65) + anyvac + anyav + country + season, data = train.H1N1)
summary(fit1)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + anyvac + anyav +
## country + season, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -30.137 -4.827 -1.107 2.042 53.322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.78506 6.81294 -0.409 0.683297
## ns(age, knots = 65)1 10.66178 5.86569 1.818 0.071182 .
## ns(age, knots = 65)2 4.73289 7.35609 0.643 0.520983
## anyvac1 -1.92200 3.12520 -0.615 0.539517
## anyav1 4.20919 2.21731 1.898 0.059639 .
## countryAustralia -5.56228 5.38307 -1.033 0.303188
## countryAustria 16.77486 9.69580 1.730 0.085737 .
## countryBelgium 1.36901 8.75488 0.156 0.875958
## countryChina 2.59938 8.15419 0.319 0.750354
## countryDenmark 0.30909 6.08108 0.051 0.959532
## countryGermany 7.25647 6.93447 1.046 0.297102
## countryGreece -0.05434 6.00439 -0.009 0.992792
## countryNorway 1.08135 12.25612 0.088 0.929816
## countryPeru 11.06801 7.48680 1.478 0.141487
## countryPoland -1.17634 8.08565 -0.145 0.884530
## countrySingapore -8.70823 14.15569 -0.615 0.539403
## countrySpain 4.51538 6.53235 0.691 0.490524
## countryThailand -6.52388 5.12924 -1.272 0.205445
## countryUK 3.12786 5.85923 0.534 0.594273
## countryUSA 3.18720 7.14145 0.446 0.656049
## seasonNH.10.11 4.30570 3.37332 1.276 0.203855
## seasonNH.11.12 0.66725 12.03208 0.055 0.955852
## seasonNH.12.13 25.14195 7.05756 3.562 0.000498 ***
## seasonNH.13.14 4.53638 3.32727 1.363 0.174872
## seasonNH.14.15 25.09532 6.15516 4.077 7.49e-05 ***
## seasonNH.15.16 1.85558 2.80403 0.662 0.509179
## seasonNH.16.17 5.19523 8.50961 0.611 0.542477
## seasonSH.10 8.01524 8.50943 0.942 0.347798
## seasonSH.11 7.20703 9.28035 0.777 0.438666
## seasonSH.13 10.66405 6.72724 1.585 0.115098
## seasonSH.14 20.31012 7.42238 2.736 0.006990 **
## seasonSH.16 4.91236 4.35758 1.127 0.261471
## seasonSH.17 4.35156 11.34880 0.383 0.701957
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 114.0787)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 16541 on 145 degrees of freedom
## AIC: 1379.8
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit2 = glm(tothospdays ~ ns(age, knots = 65) + p.g1.protection + anyvac + anyav + country + season, data = train.H1N1)
summary(fit2)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + p.g1.protection +
## anyvac + anyav + country + season, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -30.133 -4.843 -1.129 2.020 53.336
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.76723 6.86253 -0.403 0.687372
## ns(age, knots = 65)1 10.80940 7.68503 1.407 0.161714
## ns(age, knots = 65)2 4.85373 8.41711 0.577 0.565076
## p.g1.protection -0.09706 3.24884 -0.030 0.976207
## anyvac1 -1.91654 3.14136 -0.610 0.542758
## anyav1 4.20237 2.23669 1.879 0.062289 .
## countryAustralia -5.57467 5.41762 -1.029 0.305210
## countryAustria 16.78888 9.74068 1.724 0.086929 .
## countryBelgium 1.36612 8.78573 0.155 0.876651
## countryChina 2.62361 8.22254 0.319 0.750132
## countryDenmark 0.30929 6.10215 0.051 0.959647
## countryGermany 7.24344 6.97214 1.039 0.300587
## countryGreece -0.04880 6.02803 -0.008 0.993551
## countryNorway 1.05042 12.34209 0.085 0.932293
## countryPeru 11.05330 7.52884 1.468 0.144251
## countryPoland -1.17135 8.11537 -0.144 0.885436
## countrySingapore -8.76454 14.32923 -0.612 0.541730
## countrySpain 4.51935 6.55631 0.689 0.491736
## countryThailand -6.53670 5.16485 -1.266 0.207696
## countryUK 3.13385 5.88294 0.533 0.595061
## countryUSA 3.18101 7.16918 0.444 0.657921
## seasonNH.10.11 4.31268 3.39306 1.271 0.205768
## seasonNH.11.12 0.70217 12.13018 0.058 0.953920
## seasonNH.12.13 25.14070 7.08212 3.550 0.000521 ***
## seasonNH.13.14 4.53546 3.33894 1.358 0.176475
## seasonNH.14.15 25.08660 6.18337 4.057 8.11e-05 ***
## seasonNH.15.16 1.84625 2.83102 0.652 0.515344
## seasonNH.16.17 5.22616 8.60160 0.608 0.544423
## seasonSH.10 8.01965 8.54018 0.939 0.349277
## seasonSH.11 7.23035 9.34515 0.774 0.440376
## seasonSH.13 10.68226 6.77801 1.576 0.117216
## seasonSH.14 20.32178 7.45829 2.725 0.007233 **
## seasonSH.16 4.91872 4.37785 1.124 0.263074
## seasonSH.17 4.36440 11.39620 0.383 0.702307
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 114.8702)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 16541 on 144 degrees of freedom
## AIC: 1381.8
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age only, plus fixed effects of vaccination, av use, underlying conditions NO COUNTRY AND SEASON
fit3 = glm(tothospdays ~ ns(age, knots = 65) + anyvac + anyav , data = train.H1N1)
summary(fit3)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + anyvac + anyav,
## data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.998 -5.281 -3.344 0.200 72.905
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.400 2.786 1.220 0.224
## ns(age, knots = 65)1 11.680 5.645 2.069 0.040 *
## ns(age, knots = 65)2 5.517 7.372 0.748 0.455
## anyvac1 -1.930 3.107 -0.621 0.535
## anyav1 1.828 2.154 0.848 0.397
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 136.1311)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 23551 on 173 degrees of freedom
## AIC: 1386.7
##
## Number of Fisher Scoring iterations: 2
## H1N1 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, NO COUNTRY AND SEASON
fit4 = glm(tothospdays ~ ns(age, knots = 65) + p.g1.protection + anyvac + anyav, data = train.H1N1)
summary(fit4)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + p.g1.protection +
## anyvac + anyav, data = train.H1N1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.407 -5.244 -3.438 0.109 73.618
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.083 2.869 1.075 0.284
## ns(age, knots = 65)1 9.323 7.483 1.246 0.214
## ns(age, knots = 65)2 3.435 8.561 0.401 0.689
## p.g1.protection 1.571 3.263 0.481 0.631
## anyvac1 -1.991 3.116 -0.639 0.524
## anyav1 1.943 2.172 0.894 0.372
##
## (Dispersion parameter for gaussian family taken to be 136.7383)
##
## Null deviance: 24361 on 177 degrees of freedom
## Residual deviance: 23519 on 172 degrees of freedom
## AIC: 1388.5
##
## Number of Fisher Scoring iterations: 2
## Plot the fits for each model side by side
par(mfrow = c(1, 2), las = 3, mar = c(6, 6, 6, 3), cex.axis = .7)
library(gam)
## Loading required package: foreach
## Loaded gam 1.14-4
plot.gam(fit00, se = T, col = 'dodgerblue', main = 'Simplified Baseline')
plot.gam(fit0, se = T, col = 'dodgerblue', main = 'Baseline')
plot.gam(fit1, se = T, col = 'dodgerblue', main = 'Baseline + age + imp')
plot.gam(fit2, se = T, col = 'dodgerblue', main = 'Baseline + age + imp')
plot.gam(fit3, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
plot.gam(fit4, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
pdf('003Tothospdays_H1N1.pdf')
plot.gam(fit4, se = T, col = 'dodgerblue', main = 'Simple baseline + age + imp')
dev.off()
## quartz_off_screen
## 2
# Predict probs for each patient
pred00 = predict(fit00, newdata = test.H1N1)
pred3 = predict(fit3, newdata = test.H1N1)
pred4 = predict(fit4, newdata = test.H1N1)
# Estimate the simulated error rate
MSE = numeric(6); names(MSE) = c('simple.baseline', 'simple.baseline+age', 'simple.baseline+age+imp')
MSE[1] = mean((pred00 - test.H1N1$tothospdays)^2)
MSE[2] = mean((pred3 - test.H1N1$tothospdays)^2)
MSE[3] = mean((pred4 - test.H1N1$tothospdays)^2)
sort(MSE)
## <NA> <NA> <NA>
## 0.00000 0.00000 0.00000
## simple.baseline+age+imp simple.baseline+age simple.baseline
## 77.55715 77.87408 80.51232
AIC = c(fit00$aic, fit0$aic, fit1$aic, fit2$aic, fit3$aic, fit4$aic); names(AIC) = c('simple.baseline', 'baseline', 'baeline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
del.AIC = sort(AIC - min(AIC))
del.AIC
## baeline+age baseline baseline+age+imp
## 0.0000000 0.1259588 1.9988967
## simple.baseline+age simple.baseline simple.baseline+age+imp
## 6.8851472 7.3664140 8.6455012
load('master.AIC.table.H1N1.RData')
master.AIC.table['003Tothospdays', c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')] = AIC - min(AIC)
save(master.AIC.table, file = 'master.AIC.table.H1N1.RData')
rm(master.AIC.table)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
## integer(0)
##
## Argentina Australia Belgium China Denmark Germany Peru
## 7 14 1 2 8 2 4
## Singapore Spain Thailand UK USA
## 1 2 50 12 12
## [1] 115
library(splines)
## H3N2 fit to medical history only (no effect of country, season, age or imprinting)
fit00 = glm(tothospdays ~ anyvac + anyav, data = train.H3N2)
summary(fit00)
##
## Call:
## glm(formula = tothospdays ~ anyvac + anyav, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -7.918 -2.494 -1.494 -0.494 59.506
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.9183 1.9899 3.979 0.000133 ***
## anyvac1 -0.2311 2.0335 -0.114 0.909754
## anyav1 -3.4247 2.1310 -1.607 0.111287
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 61.02857)
##
## Null deviance: 6078.0 on 99 degrees of freedom
## Residual deviance: 5919.8 on 97 degrees of freedom
## AIC: 699.88
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to country, season and medical history only (no effect of age or imprinting)
fit0 = glm(tothospdays ~ anyvac + anyav + country + season, data = train.H3N2)
summary(fit0)
##
## Call:
## glm(formula = tothospdays ~ anyvac + anyav + country + season,
## data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -23.1545 -2.1049 -0.3286 1.1937 23.2288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.5040 5.8944 1.782 0.07869 .
## anyvac1 0.2181 2.0477 0.106 0.91547
## anyav1 -2.8636 2.1853 -1.310 0.19395
## countryAustralia -15.1388 4.8277 -3.136 0.00243 **
## countryDenmark -8.3283 4.7897 -1.739 0.08607 .
## countryOther -1.4478 4.7126 -0.307 0.75950
## countryPeru -2.5050 5.4054 -0.463 0.64437
## countryThailand -9.9565 3.9042 -2.550 0.01275 *
## countryUK -9.6325 4.7011 -2.049 0.04387 *
## countryUSA -6.0500 4.9102 -1.232 0.22165
## seasonNH.12.13 1.9628 4.6814 0.419 0.67619
## seasonNH.13.14 3.3207 5.3483 0.621 0.53651
## seasonNH.14.15 3.1407 4.6702 0.672 0.50328
## seasonNH.15.16 5.3943 5.1282 1.052 0.29614
## seasonNH.16.17 6.1773 4.9334 1.252 0.21431
## seasonSH.10 34.6529 6.4265 5.392 7.39e-07 ***
## seasonSH.11 1.3596 7.1632 0.190 0.84996
## seasonSH.12 12.4984 8.4172 1.485 0.14167
## seasonSH.13 6.8796 6.0663 1.134 0.26028
## seasonSH.14 -2.1349 4.9166 -0.434 0.66534
## seasonSH.15 4.9001 5.0081 0.978 0.33093
## seasonSH.16 10.2498 5.4268 1.889 0.06269 .
## seasonSH.17 10.5085 5.4685 1.922 0.05835 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 40.55114)
##
## Null deviance: 6078.0 on 99 degrees of freedom
## Residual deviance: 3122.4 on 77 degrees of freedom
## AIC: 675.91
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age only, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit1 = glm(tothospdays ~ ns(age, knots = 65) + anyvac + anyav + country + season, data = train.H3N2)
summary(fit1)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + anyvac + anyav +
## country + season, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -19.9770 -2.5916 -0.1418 1.7009 20.8286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.6056 5.8599 2.322 0.02296 *
## ns(age, knots = 65)1 0.8211 3.7709 0.218 0.82822
## ns(age, knots = 65)2 9.3137 3.1174 2.988 0.00380 **
## anyvac1 -1.4179 2.0381 -0.696 0.48875
## anyav1 -2.7812 2.0921 -1.329 0.18774
## countryAustralia -15.2190 4.6322 -3.285 0.00155 **
## countryDenmark -8.5429 4.7058 -1.815 0.07346 .
## countryOther -2.2521 4.5698 -0.493 0.62357
## countryPeru -5.3021 5.3371 -0.993 0.32369
## countryThailand -10.1534 3.7805 -2.686 0.00891 **
## countryUK -10.3387 4.5860 -2.254 0.02709 *
## countryUSA -6.3205 4.7691 -1.325 0.18910
## seasonNH.12.13 -1.5773 4.6609 -0.338 0.73600
## seasonNH.13.14 1.9665 5.2408 0.375 0.70855
## seasonNH.14.15 0.9677 4.5742 0.212 0.83303
## seasonNH.15.16 2.5605 5.0225 0.510 0.61168
## seasonNH.16.17 4.3694 4.8023 0.910 0.36582
## seasonSH.10 29.7895 6.3831 4.667 1.31e-05 ***
## seasonSH.11 -1.1338 6.9292 -0.164 0.87046
## seasonSH.12 9.2825 8.1489 1.139 0.25828
## seasonSH.13 5.3059 5.8360 0.909 0.36618
## seasonSH.14 -2.4024 4.7144 -0.510 0.61184
## seasonSH.15 2.4069 4.9119 0.490 0.62555
## seasonSH.16 7.6984 5.2718 1.460 0.14838
## seasonSH.17 8.2961 5.3033 1.564 0.12195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 37.15256)
##
## Null deviance: 6078.0 on 99 degrees of freedom
## Residual deviance: 2786.4 on 75 degrees of freedom
## AIC: 668.52
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, country and season
fit2 = glm(tothospdays ~ ns(age, knots = 65) + p.g2.protection + anyvac + anyav + country + season, data = train.H3N2)
summary(fit2)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + p.g2.protection +
## anyvac + anyav + country + season, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -19.8535 -2.6514 -0.2554 1.6159 20.8254
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.384 6.493 1.907 0.06037 .
## ns(age, knots = 65)1 2.885 5.973 0.483 0.63047
## ns(age, knots = 65)2 10.374 3.930 2.640 0.01011 *
## p.g2.protection 1.457 3.257 0.447 0.65599
## anyvac1 -1.441 2.050 -0.703 0.48426
## anyav1 -2.759 2.104 -1.311 0.19385
## countryAustralia -15.262 4.658 -3.276 0.00160 **
## countryDenmark -8.501 4.732 -1.796 0.07651 .
## countryOther -2.275 4.595 -0.495 0.62199
## countryPeru -5.133 5.379 -0.954 0.34308
## countryThailand -10.106 3.802 -2.658 0.00963 **
## countryUK -10.502 4.625 -2.271 0.02608 *
## countryUSA -6.236 4.798 -1.300 0.19775
## seasonNH.12.13 -1.864 4.730 -0.394 0.69466
## seasonNH.13.14 1.902 5.271 0.361 0.71930
## seasonNH.14.15 1.012 4.600 0.220 0.82640
## seasonNH.15.16 2.411 5.061 0.476 0.63516
## seasonNH.16.17 4.340 4.829 0.899 0.37170
## seasonSH.10 29.653 6.425 4.616 1.61e-05 ***
## seasonSH.11 -1.495 7.013 -0.213 0.83173
## seasonSH.12 9.395 8.197 1.146 0.25540
## seasonSH.13 5.089 5.887 0.864 0.39018
## seasonSH.14 -2.258 4.751 -0.475 0.63597
## seasonSH.15 2.369 4.939 0.480 0.63290
## seasonSH.16 7.677 5.300 1.448 0.15171
## seasonSH.17 8.130 5.345 1.521 0.13250
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 37.55311)
##
## Null deviance: 6078.0 on 99 degrees of freedom
## Residual deviance: 2778.9 on 74 degrees of freedom
## AIC: 670.25
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age only, plus fixed effects of vaccination, av use, underlying conditions NO COUNTRY AND SEASON
fit3 = glm(tothospdays ~ ns(age, knots = 65) + anyvac + anyav, data = train.H3N2)
summary(fit3)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + anyvac + anyav,
## data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.214 -2.549 -1.017 0.947 51.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.588 2.355 3.647 0.000433 ***
## ns(age, knots = 65)1 1.444 4.067 0.355 0.723428
## ns(age, knots = 65)2 11.944 3.457 3.455 0.000824 ***
## anyvac1 -2.201 2.034 -1.082 0.282012
## anyav1 -3.514 2.028 -1.733 0.086392 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 55.24726)
##
## Null deviance: 6078.0 on 99 degrees of freedom
## Residual deviance: 5248.5 on 95 degrees of freedom
## AIC: 691.84
##
## Number of Fisher Scoring iterations: 2
## H3N2 fit to age plus imprinting, plus fixed effects of vaccination, av use, underlying conditions, NO COUNTRY AND SEASON
fit4 = glm(tothospdays ~ ns(age, knots = 65) + p.g2.protection + anyvac + anyav, data = train.H3N2)
summary(fit4)
##
## Call:
## glm(formula = tothospdays ~ ns(age, knots = 65) + p.g2.protection +
## anyvac + anyav, data = train.H3N2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -10.058 -2.446 -0.979 0.852 51.203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.283 3.904 1.609 0.11091
## ns(age, knots = 65)1 4.968 6.264 0.793 0.42967
## ns(age, knots = 65)2 13.715 4.209 3.259 0.00156 **
## p.g2.protection 2.662 3.591 0.741 0.46042
## anyvac1 -2.184 2.039 -1.071 0.28693
## anyav1 -3.419 2.037 -1.679 0.09655 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 55.51057)
##
## Null deviance: 6078 on 99 degrees of freedom
## Residual deviance: 5218 on 94 degrees of freedom
## AIC: 693.26
##
## Number of Fisher Scoring iterations: 2
## Plot the fits for each model side by side
par(mfrow = c(1, 2), las = 3, mar = c(6, 6, 6, 3), cex.axis = .7)
library(gam)
plot.gam(fit00, se = T, col = 'firebrick1', main = 'Simplified Baseline')
plot.gam(fit0, se = T, col = 'firebrick1', main = 'Baseline')
plot.gam(fit1, se = T, col = 'firebrick1', main = 'Baseline + age + imp')
plot.gam(fit2, se = T, col = 'firebrick1', main = 'Baseline + age + imp')
plot.gam(fit3, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
plot.gam(fit4, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
pdf('003Tothospdays_H3N2.pdf')
plot.gam(fit4, se = T, col = 'firebrick1', main = 'Simple baseline + age + imp')
dev.off()
## quartz_off_screen
## 2
# Predict probs for each patient
pred00 = predict(fit00, newdata = test.H3N2)
pred3 = predict(fit3, newdata = test.H3N2)
pred4 = predict(fit4, newdata = test.H3N2)
# Estimate the simulated error rate
MSE = numeric(3); names(MSE) = c('simple.baseline', 'simple.baseline+age', 'simple.baseline+age+imp')
MSE[1] = mean((pred00 - test.H3N2$tothospdays)^2)
MSE[2] = mean((pred3 - test.H3N2$tothospdays)^2)
MSE[3] = mean((pred4 - test.H3N2$tothospdays)^2)
sort(MSE)
## simple.baseline simple.baseline+age+imp simple.baseline+age
## 81.01654 83.27925 83.35676
AIC = c(fit00$aic, fit0$aic, fit1$aic, fit2$aic, fit3$aic, fit4$aic); names(AIC) = c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')
del.AIC = sort(AIC - min(AIC))
del.AIC
## baseline+age baseline+age+imp baseline
## 0.000000 1.730035 7.384847
## simple.baseline+age simple.baseline+age+imp simple.baseline
## 23.317484 24.734733 31.353222
load('master.AIC.table.H3N2.RData')
master.AIC.table['003Tothospdays', c('simple.baseline', 'baseline', 'baseline+age', 'baseline+age+imp', 'simple.baseline+age', 'simple.baseline+age+imp')] = AIC - min(AIC)
save(master.AIC.table, file = 'master.AIC.table.H3N2.RData')
rm(master.AIC.table)